--- toc: true title: Scooter Exploration image: images/company_logo.png keywords: fastai sidebar: home_sidebar summary: "In this chapter Scooter data is explored" description: "In this chapter Scooter data is explored" nb_path: "notebooks/02_scooterExploration.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
 
{% endraw %} {% raw %}
 
{% endraw %} {% raw %}
 
{% endraw %}

Introduction

Binder Binder Binder Open Source Love svg3

NPM License Active Python Versions GitHub last commit No Maintenance Intended

GitHub stars GitHub watchers GitHub forks GitHub followers

Tweet Twitter Follow

This chapter has brought to you in collaboration by Brian Kelly, Michael Vandi, Logan Shertz, and Charles Karpati.

Dataset: Scooter data:

  • Routes: 3 months (September to August 2019)
  • Deployment/
  • Routes
  • Trip origins-destinations by month

Local File Access (Optional)

{% raw %}
# (Optional) Run this cell to gain access to Google Drive (Colabs only) 
from google.colab import drive

# Colabs operates in a virtualized enviornment
# Colabs default directory is at ~/content.
# We mount Drive into a temporary folder at '~/content/drive' 

drive.mount('/content/drive')
{% endraw %}

File path's will vary

{% raw %}
 
{% endraw %} {% raw %}
 
/content/drive/My Drive/DATA/scooter
{% endraw %} {% raw %}
# cd ./drive/'My Drive'/BNIA/'Scooter Use Data'/BNIA
{% endraw %} {% raw %}
cd ./Routes
/content/drive/My Drive/DATA/scooter/Routes
{% endraw %} {% raw %}
! ls
{% endraw %} {% raw %}
!cd ../ && ls
{% endraw %}

Installs

{% raw %}
%%capture
! pip install dexplot
!pip install folium
!pip install geopandas
!pip install ipyleaflet
!pip install gpdvega
!pip install dataplay
{% endraw %}

Imports

{% raw %}
%%capture
import os
import pandas as pd
import geopandas as gpd
import dexplot as dxp
import folium as fol
import json
import altair as alt
import gpdvega
import dataplay
# These imports will handle everything
import os
import sys
import csv
from IPython.display import clear_output
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
import psycopg2
import pyproj
from pyproj import Proj, transform
# conda install -c conda-forge proj4
from shapely.geometry import Point
from shapely import wkb
from shapely.wkt import loads
# https://pypi.org/project/geopy/
from geopy.geocoders import Nominatim
import folium
from folium import plugins

from dataplay.merge import mergeDatasets 

import dexplot as dxp

# In case file is KML, enable support
import fiona
fiona.drvsupport.supported_drivers['kml'] = 'rw'
fiona.drvsupport.supported_drivers['KML'] = 'rw'

#this cell is good to copy
from shapely.geometry import LineString
pd.plotting.register_matplotlib_converters()

from dataplay.geoms import readInGeometryData
from shapely import wkt
from dataplay.geoms import readInGeometryData

import ipywidgets as widgets
!jupyter nbextension enable --py widgetsnbextension
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
alt.data_transformers.enable('default', max_rows=None)
{% endraw %}

Convenience Functions

{% raw %}
def split(geom):
  print( str( type(geom)) )
  #Linestring might not be the actual type, so this may need to be fied. 
  #IF its not a linestring then dont run it and stuff 'false' and the datatype
  if str( type(geom)) == "<class 'shapely.geometry.linestring.LineString'>" and not str(geom.boundary) == 'MULTIPOINT EMPTY':
    print(geom.boundary)
    left, right = geom.boundary
    print('left x: ', type(left.x), left.x)
    print('left y: ', type(left.y), left.y)
    print('right x: ', type(right.x), right.x)
    print('right y: ', type(right.y), right.y)
    return left.x, left.y, right.x, right.y
  else: return False, type(geom), False, type(geom)
{% endraw %} {% raw %}
def getPointsInPolygons(pts, polygons, ptsCoordCol, polygonsCoordCol): 
    count = 0 
    total = pts.size / len(pts.columns)
    # We're going to keep a list of how many points we find.
    pts_in_polys = []

    # Loop over polygons with index i.
    for i, poly in polygons.iterrows():
        # print('Searching for point within Geom:', poly )
        # Keep a list of points in this poly
        pts_in_this_poly = []

        # Now loop over all points with index j.
        for j, pt in pts.iterrows():
          if poly[polygonsCoordCol].contains(pt[ptsCoordCol]):
            # Then it's a hit! Add it to the list,
            # and drop it so we have less hunting.
            pts_in_this_poly.append(pt[ptsCoordCol])
            pts = pts.drop([j])

        # We could do all sorts, like grab a property of the
        # points, but let's just append the number of them.
        pts_in_polys.append(len(pts_in_this_poly))
        print('Found this many points within the Geom:', len(pts_in_this_poly) ) 
        count = count + len(pts_in_this_poly) 
        clear_output(wait=True)

    # Add the number of points for each poly to the dataframe.
    polygons['pointsinpolygon'] = gpd.GeoSeries(pts_in_polys)
    print( 'Total Points: ', total )
    print( 'Total Points in Polygons: ', count )
    print( 'Prcnt Points in Polygons: ', count / total )
    return polygons
{% endraw %}

File Access Convenience Functions

{% raw %}
def findFile(root, file):
    for d, subD, f in os.walk(root):
        if file in f:
            return "{1}/{0}".format(file, d)
            break 

# To 'import' a script you wrote, map its filepath into the sys
def addPath(root, file): sys.path.append(os.path.abspath( findFile( './', file) ))
{% endraw %} {% raw %}
findFile('./', 'Routing September 2019.geojson')
'.//Routing September 2019.geojson'
{% endraw %} {% raw %}
ls
 leftRouts.csv                   'Routing November 2019.geojson'
 rightRouts.csv                  'Routing October 2019.geojson'
'Routing August 2019.geojson'    'Routing September 2019.geojson'
'Routing December 2019.geojson'
{% endraw %}

Inspect Deployment Data

{% raw %}
ls 'Trip origins-destinations by month'
'Trip Destinations by block August 2019.geojson'
'Trip Destinations by block December 2019.geojson'
'Trip Destinations by block November 2019.geojson'
'Trip Destinations by block October 2019.geojson'
'Trip Destinations by block September 2019.geojson'
'Trip Origins by block August 2019.geojson'
'Trip Origins by block December 2019.geojson'
'Trip Origins by block November 2019.geojson'
'Trip Origins by block October 2019.geojson'
'Trip Origins by block September 2019.geojson'
{% endraw %} {% raw %}
#@markdown Forms support many types of fields.
fileName = "Trip Origins by block September 2019.geojson"  #@param ['Daily Deployment average by block August 2019.csv', 'Daily Deployment average by block December 2019.csv', 'Daily Deployment average by block November 2019.csv', 'Daily Deployment average by block October 2019.csv', 'Daily Deployment average by block September 2019.csv', 'Trip Destinations by block August 2019.geojson','Trip Destinations by block December 2019.geojson','Trip Destinations by block November 2019.geojson', 'Trip Destinations by block October 2019.geojson', 'Trip Destinations by block September 2019.geojson', 'Trip Origins by block August 2019.geojson','Trip Origins by block December 2019.geojson','Trip Origins by block November 2019.geojson','Trip Origins by block October 2019.geojson','Trip Origins by block September 2019.geojson']
#@markdown ---
{% endraw %} {% raw %}
gdf = gpd.read_file( findFile('./', fileName) )
gdf.head()
id name color radius value geometry
0 1 Block 245102502063018 #fff 4.999286 NaN POLYGON ((-76.66168 39.26342, -76.66136 39.26373, -76.66096 39.26343, -76.66041 39.26306, -76.66053 39.26296, -76.66070 39.26281, -76.66077 39.26284, -76.66100 39.26296, -76.66117 39.26306, -76.66129 39.26315, -76.66141 39.26322, -76.66168 39.26342))
1 2 Block 245102006001022 #fff 4.999286 NaN POLYGON ((-76.66319 39.28427, -76.66306 39.28436, -76.66301 39.28439, -76.66293 39.28439, -76.66271 39.28418, -76.66303 39.28402, -76.66317 39.28417, -76.66320 39.28423, -76.66319 39.28427))
2 3 Block 245102804033017 #fff 4.999286 NaN POLYGON ((-76.71122 39.27927, -76.71121 39.27975, -76.71121 39.27990, -76.71119 39.28076, -76.71045 39.27932, -76.71045 39.27920, -76.71056 39.27907, -76.71123 39.27880, -76.71122 39.27927))
3 4 Block 245102716006003 #fff 4.999286 NaN POLYGON ((-76.66960 39.34512, -76.66905 39.34526, -76.66886 39.34531, -76.66848 39.34539, -76.66835 39.34502, -76.66867 39.34498, -76.66924 39.34484, -76.66946 39.34475, -76.66960 39.34512))
4 5 Block 245102709023013 #fff 4.999286 NaN POLYGON ((-76.58976 39.35401, -76.58972 39.35415, -76.58966 39.35435, -76.58964 39.35447, -76.58960 39.35469, -76.58940 39.35467, -76.58925 39.35465, -76.58912 39.35465, -76.58902 39.35464, -76.58898 39.35464, -76.58906 39.35425, -76.58917 39.35385, -76.58925 39.35385, -76.58939 39.35386, -76.58979 39.35393, -76.58976 39.35401))
{% endraw %} {% raw %}
gdf.plot(column = 'value')
<matplotlib.axes._subplots.AxesSubplot at 0x7f2c97160da0>
{% endraw %} {% raw %}
point_df = gdf.copy()
point_df['centroids'] = df.centroid
point_df = point_df.drop(columns = 'geometry')
point_df = point_df.set_geometry('centroids')
point_df.head(1)
point_df.plot(marker='o', color='red', markersize='value')
{% endraw %} {% raw %}
point_df.value.hist()
{% endraw %} {% raw %}
point_df[point_df.value > 1000].plot()
{% endraw %} {% raw %}
baltimore = gpd.read_file("https://opendata.arcgis.com/datasets/b738a8587b6d479a8824d937892701d8_0.geojson")
{% endraw %} {% raw %}
from dataplay.geoms import workWithGeometryData
{% endraw %} {% raw %}
baltimore.columns
Index(['OBJECTID', 'CSA2010', 'tpop10', 'male10', 'female10', 'paa17',
       'pwhite17', 'pasi17', 'p2more17', 'ppac17', 'phisp17', 'racdiv17',
       'age5_17', 'age18_17', 'age24_17', 'age64_17', 'age65_17', 'hhs10',
       'femhhs17', 'fam17', 'hhsize10', 'mhhi17', 'hh25inc17', 'hh40inc17',
       'hh60inc17', 'hh75inc17', 'hhm7517', 'hhpov17', 'hhchpov17',
       'Shape__Area', 'Shape__Length', 'geometry'],
      dtype='object')
{% endraw %} {% raw %}
pointsInPolys = workWithGeometryData(method = 'ponp', df = point_df, polys = baltimore, ptsCoordCol ='centroids', 
                                     polygonsCoordCol = 'geometry', polygonsLabel = 'CSA2010')
pointsInPolys.plot(column='value', legend=True, markersize = 1)
Total Points:  13506.0
Total Points in Polygons:  13453
Prcnt Points in Polygons:  0.9960758181548941
<matplotlib.axes._subplots.AxesSubplot at 0x7ff7c35c92e8>
{% endraw %} {% raw %}
ponp_copy = pointsInPolys.copy()
{% endraw %} {% raw %}
ponp_copy.value.isnull().groupby([ponp_copy['CSA2010']]).sum().astype(int).reset_index(name='NumberMissingCount').head()
CSA2010 NumberMissingCount
0 Allendale/Irvington/S. Hilton 290
1 Beechfield/Ten Hills/West Hills 180
2 Belair-Edison 250
3 Brooklyn/Curtis Bay/Hawkins Point 580
4 Canton 0
{% endraw %} {% raw %}
ponp_copy.value.notnull().groupby([ponp_copy['CSA2010']]).sum().astype(int) / ponp_copy.value.isnull().groupby([ponp_copy['CSA2010']]).sum().astype(int)
{% endraw %} {% raw %}
ponp_copy.value.isnull().groupby([ponp_copy['CSA2010']]).sum().astype(int) / ponp_copy.value.groupby([ponp_copy['CSA2010']]).sum().astype(int) * 100
{% endraw %} {% raw %}
ponp_copy.fillna(-1).groupby('CSA2010')['value'].sum()
{% endraw %} {% raw %}
gdf.value.unique()
{% endraw %} {% raw %}
gdf.value.describe()
count    5677.000000
mean     40.862603  
std      175.769408 
min      1.000000   
25%      2.000000   
50%      5.000000   
75%      21.000000  
max      6978.000000
Name: value, dtype: float64
{% endraw %} {% raw %}
gdf.value.var() # Return unbiased variance over requested axis.
30894.884924369213
{% endraw %} {% raw %}
gdf.value.sem() # Return unbiased standard error of the mean over requested axis.
2.332834040372481
{% endraw %} {% raw %}
gdf.value.nunique()	# Count distinct observations over requested axis.
364
{% endraw %} {% raw %}
gdf.shape

#DataFrame.size	Return an int representing the number of elements in this object.
gdf.size

# DataFrame.ndim	Return an integer representing the number of axes/array dimensions.
gdf.ndim

# Note Used : 
# DataFrame.axes	Return a list representing the axes of the DataFrame.

gdf.dtypes

# Return unbiased kurtosis over requested axis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).
gdf.kurtosis()
(13506, 6)
81036
2
id          object  
name        object  
color       object  
radius      float64 
value       float64 
geometry    geometry
dtype: object
id       -1.200000   
radius    1132.851562
value     495.784213 
dtype: float64
{% endraw %}

Load Tracts

{% raw %}
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.set_option('max_colwidth', 20)
{% endraw %} {% raw %}
in_crs = 2248 # The CRS we recieve our data.
out_crs = 4326 # The CRS we would like to have our data represented as.
geom = 'geometry' # The column where our spatial information lives.

# A Url to load
boundariesBaltimoreTractsNoWater2010 = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ8xXdUaT17jkdK0MWTJpg3GOy6jMWeaXTlguXNjCSb8Vr_FanSZQRaTU-m811fQz4kyMFK5wcahMNY/pub?gid=886223646&single=true&output=csv"

# Read in the dataframe
gdf = pd.read_csv(boundariesBaltimoreTractsNoWater2010)

# Convert the geometry column datatype from a string of text into a coordinate datatype.
gdf['geometry'] = gdf['geometry'].apply(lambda x: loads( str(x) ))

# Process the dataframe as a geodataframe with a known CRS and geom column.
gdf = GeoDataFrame(gdf, crs=in_crs, geometry='geometry')
{% endraw %} {% raw %}
boundariesBaltimoreTractsNoWater2010.head()
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-27-1c0a7e8c6b97> in <module>()
----> 1 boundariesBaltimoreTractsNoWater2010.head()

AttributeError: 'str' object has no attribute 'head'
{% endraw %}

Ensure merge is on consistent datatypes

{% raw %}
gdf['GEOID10'] = gdf['GEOID10'].astype(str)
scooterdf['nameChange2'] = scooterdf['nameChange2'].astype(str)
{% endraw %}

Perform the merge

{% raw %}
scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-28-25b7a83381ef> in <module>()
----> 1 scooterdfClean = gdf.merge(scooterdf, left_on='GEOID10', right_on='nameChange2').drop(['name', 'nameChange1', 'nameChange2'], axis=1)

NameError: name 'scooterdf' is not defined
{% endraw %} {% raw %}
scooterdfClean.head()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-372a38a4735f> in <module>()
----> 1 scooterdfClean.head()

NameError: name 'scooterdfClean' is not defined
{% endraw %} {% raw %}
# gdf.drop(columns='geometry').to_csv('./boundsdf.csv', index=False)
{% endraw %} {% raw %}
ls
{% endraw %} {% raw %}
scooterdfClean.groupby('CSA')['value'].sum()
{% endraw %} {% raw %}
scooterdfClean.value.isna().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='notApplicable')
{% endraw %} {% raw %}
scooterdfClean.value.notnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NotMissingCount')
{% endraw %} {% raw %}
scooterdfClean.value.isnull().groupby([scooterdfClean['CSA']]).sum().astype(int).reset_index(name='NumberMissingCount')
{% endraw %} {% raw %}
scooterdfClean.fillna(-1).groupby('CSA')['value'].sum()
{% endraw %} {% raw %}
scooterdfClean.groupby('CSA')['value'].mean()
{% endraw %} {% raw %}
scooterdfClean.groupby('CSA')['value'].sum()
{% endraw %} {% raw %}
scooterdfClean.CSA.value_counts()
{% endraw %} {% raw %}
https://pandas.pydata.org/docs/getting_started/intro_tutorials/06_calculate_statistics.html
{% endraw %}

Inspect Routes Data

{% raw %}
fileName = 'Routing December 2019.geojson'  #@param ['Routing August 2019.geojson', 'Routing October 2019.geojson', 'Routing December 2019.geojson', 'Routing September 2019.geojson', 'Routing November 2019.geojson']
columnName = "streetname"  #@param ['id', 'color', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent']
gdf = gpd.read_file( findFile('./', fileName) )
gdf.head()
id color streetname trip_count_sum trip_count_average trip_count_percent geometry
0 150197772 rgb(218, 231, 241) Jefferson Street 24 0.774194 0.0% LINESTRING (-76.58576 39.29660, -76.58550 39.29661)
1 150155955 rgb(164, 190, 219) 206 6.645161 0.3% LINESTRING (-76.61320 39.28136, -76.61318 39.28115)
2 150191673 rgb(204, 221, 236) Harford Avenue 49 1.580645 0.1% LINESTRING (-76.60169 39.30535, -76.60163 39.30544, -76.60156 39.30555)
3 150184657 rgb(229, 239, 246) 11 0.354839 0.0% LINESTRING (-76.61493 39.29294, -76.61491 39.29258, -76.61490 39.29230)
4 150188407 rgb(169, 194, 221) 178 5.741935 0.2% LINESTRING (-76.61662 39.29053, -76.61661 39.29046)
{% endraw %} {% raw %}
baltimore = gpd.read_file("https://opendata.arcgis.com/datasets/b738a8587b6d479a8824d937892701d8_0.geojson")
{% endraw %} {% raw %}
tst = """background = alt.Chart(gdf).mark_geoshape(
    stroke='white',
    strokeWidth=2
).encode(
    color=alt.value('#eee'),
).properties(
    width=700,
    height=500
)"""

# GeoDataFrame could be passed as usual pd.DataFrame
city = alt.Chart(baltimore).mark_geoshape(
).project(
).encode(
    color='tpop10', # shorthand infer types as for regular pd.DataFrame
    tooltip='CSA2010' # GeoDataFrame.index is accessible as id
).properties(
    width=500,
    height=300
)

routes = alt.Chart(gdf).mark_geoshape(
    filled=False,
    strokeWidth=2
)

city + routes
{% endraw %}

Clean the gdf of empties.

{% raw %}
gdf = gdf[~gdf.isna()]
gdf = gdf[~gdf.is_empty]
{% endraw %}

Now get the extremities; this will take a minute.

{% raw %}
gdf['lefty'], gdf['leftx'],gdf['righty'], gdf['rightx'] = zip(*gdf["geometry"].map(split))
{% endraw %}

Split the gdf into a left and right dataset

{% raw %}
gdf_left = gdf.copy()
gdf_left = gdf_left.drop(columns = ['geometry','rightx', 'righty'])

gdf_right= gdf.copy()
gdf_right = gdf_right.drop(columns = ['geometry','leftx', 'lefty', 'streetname', 'trip_count_sum', 'trip_count_average', 'trip_count_percent', 'color' ])
{% endraw %}

The coordinate variables of object type will cause problems.

{% raw %}
gdf_right.dtypes
id        int64  
righty    float64
rightx    object 
dtype: object
{% endraw %}

Let's go ahead and coerce a correction.

{% raw %}
gdf_right['righty']=pd.to_numeric(gdf_right['righty'], errors='coerce')
gdf_left['lefty']=pd.to_numeric(gdf_left['lefty'], errors='coerce')

gdf_right['rightx']=pd.to_numeric(gdf_right['rightx'], errors='coerce')
gdf_left['leftx']=pd.to_numeric(gdf_left['leftx'], errors='coerce')
{% endraw %}

Now we can view the results

{% raw %}
gdf_right.dtypes
id        int64  
righty    float64
rightx    float64
dtype: object
{% endraw %}

Save these csv's because it took a minute to get to where we are now.

{% raw %}
gdf_right.to_csv('rightRouts.csv')
gdf_left.to_csv('leftRouts.csv')
{% endraw %}

Convert the datasets to geodataframes for further exploration!

{% raw %}
#temp_gdf = gpd.GeoDataFrame( gdf_right, geometry=gpd.points_from_xy(gdf_right.rightx, gdf_right.righty) )
#temp_gdf.head()

# Alternately this could work.. unfinished.. but wkt.loads can make a Point from text
# gdf_right['strCol']=gdf_right['rightx'].astype(str)
# gdf_right['geometry'] = gdf_right['strCol'].apply(wkt.loads)
{% endraw %} {% raw %}
left_csaMap = readInGeometryData(url=gdf_left, porg='p', geom= False, lat= 'leftx', lng= 'lefty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)
{% endraw %} {% raw %}
right_csaMap = readInGeometryData(url=gdf_right, porg='p', geom= False, lat= 'rightx', lng= 'righty', revgeocode=False, save=False, in_crs=4268, out_crs=4268)
{% endraw %} {% raw %}
right_csaMap.head()
id righty rightx geometry
0 150197772 -76.585503 39.296607 POINT (-76.58550 39.29661)
1 150155955 -76.613183 39.281147 POINT (-76.61318 39.28115)
2 150191673 -76.601555 39.305545 POINT (-76.60156 39.30555)
3 150184657 -76.614899 39.292301 POINT (-76.61490 39.29230)
4 150188407 -76.616608 39.290458 POINT (-76.61661 39.29046)
{% endraw %} {% raw %}
left_csaMap.head()
{% endraw %} {% raw %}
left_csaMap.plot(column='trip_count_sum')
{% endraw %} {% raw %}
right_csaMap.plot()
{% endraw %} {% raw %}
baltimore.columns
{% endraw %} {% raw %}
right_points_in_poly = getPointsInPolygons(right_csaMap, baltimore, 'geometry', 'geometry')
{% endraw %} {% raw %}
right_csaMap.head()
{% endraw %} {% raw %}
left_points_in_poly = getPointsInPolygons(left_csaMap, baltimore, 'geometry', 'geometry')
{% endraw %} {% raw %}
left_csaMap.head()
{% endraw %} {% raw %}
left_points_in_poly.drop('geometry', axis=1)[['pointsinpolygon']].head(20)
pointsinpolygon
0 18
1 0
2 67
3 0
4 1536
5 18
6 32
7 8
8 1
9 65
10 0
11 0
12 13
13 1764
14 0
15 760
16 0
17 85
18 1253
19 2
{% endraw %} {% raw %}
right_points_in_poly.drop('geometry', axis=1)[['pointsinpolygon']].head(20)
pointsinpolygon
0 18
1 0
2 67
3 0
4 1536
5 18
6 32
7 8
8 1
9 65
10 0
11 0
12 13
13 1764
14 0
15 760
16 0
17 85
18 1253
19 2
{% endraw %} {% raw %}
left_points_in_poly.plot(column='pointsinpolygon', legend=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fca535fbbe0>
{% endraw %} {% raw %}
right_points_in_poly.plot(column='pointsinpolygon', legend=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fca51773978>
{% endraw %} {% raw %}
right_points_in_poly
{% endraw %} {% raw %}
scooterdfClean = left_points_in_poly.merge(right_points_in_poly, left_on='CSA2010', right_on='CSA2010', how='left')
{% endraw %} {% raw %}
scooterdfClean.columns
{% endraw %} {% raw %}
right_points_in_poly.columns
Index(['OBJECTID', 'CSA2010', 'tpop10', 'male10', 'female10', 'paa17',
       'pwhite17', 'pasi17', 'p2more17', 'ppac17', 'phisp17', 'racdiv17',
       'age5_17', 'age18_17', 'age24_17', 'age64_17', 'age65_17', 'hhs10',
       'femhhs17', 'fam17', 'hhsize10', 'mhhi17', 'hh25inc17', 'hh40inc17',
       'hh60inc17', 'hh75inc17', 'hhm7517', 'hhpov17', 'hhchpov17',
       'Shape__Area', 'Shape__Length', 'geometry', 'pointsinpolygon'],
      dtype='object')
{% endraw %} {% raw %}
right_points_in_poly.head()
{% endraw %} {% raw %}
right_points_in_poly.plot('pointsinpolygon', legend=True)
<matplotlib.axes._subplots.AxesSubplot at 0x7fca55083ac8>
{% endraw %} {% raw %}
import altair as alt
import geopandas as gpd
import gpdvega
# GeoDataFrame could be passed as usual pd.DataFrame
chart = alt.Chart(right_points_in_poly).mark_geoshape(
).project(
).encode(
    color='pointsinpolygon', # shorthand infer types as for regular pd.DataFrame
    tooltip=['CSA2010','pointsinpolygon'] # GeoDataFrame.index is accessible as id
).properties(
    width=500,
    height=300
) 

routes = alt.Chart(gdf).mark_geoshape(
    filled=False,
    strokeWidth=2
)

chart + routes

chart.save(fileName[:-8]+'.html', embed_options={'renderer':'svg'})
chart.save(fileName[:-8]+'.json')
{% endraw %}

Inspect Origins-Destinations Data

{% raw %}
ls 'Trip origins-destinations by month'
{% endraw %} {% raw %}
#@markdown Forms support many types of fields.
fileName = 'Trip Destinations by block August 2019.geojson'  #@param ['Trip Destinations by block August 2019.geojson', 'Trip Destinations by block December 2019.geojson', 'Trip Destinations by block November 2019.geojson', 'Trip Destinations by block October 2019.geojson', 'Trip Destinations by block September 2019.geojson', 'Trip Origins by block August 2019.geojson', 'Trip Origins by block December 2019.geojson', 'Trip Origins by block November 2019.geojson', 'Trip Origins by block October 2019.geojson', 'Trip Origins by block September 2019.geojson']
columnName = "name"  #@param ['name', 'value', 'color', 'radius']

#@markdown ---

gdf = gpd.read_file( findFile('./', fileName) )

gdf.plot( column = columnName)
gdf.columns
gdf[['id','name', 'value', 'color', 'radius']].head(5)
{% endraw %} {% raw %}
dxp.bar(x='color', y='value', data=gdf, aggfunc='median')
{% endraw %} {% raw %}
dxp.scatter(x = "color", y = "value", data = gdf, aggfunc = "median")
{% endraw %}